{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true,
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": "# 7 层级索引（hierarchical indexing）（机器学习，深度学习不重要，直接跳过不讲）"
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "cloth  size\n",
      "a      0       0.315327\n",
      "       1       0.700162\n",
      "       2      -0.647499\n",
      "b      0      -0.938938\n",
      "       1       1.278033\n",
      "       2      -1.203033\n",
      "c      0       0.933371\n",
      "       1       0.559600\n",
      "       2       1.064502\n",
      "d      0       0.709540\n",
      "       1       0.760230\n",
      "       2       0.676333\n",
      "dtype: float64\n",
      "<class 'pandas.core.series.Series'>\n",
      "<class 'pandas.core.indexes.multi.MultiIndex'>\n",
      "MultiIndex([('a', 0),\n",
      "            ('a', 1),\n",
      "            ('a', 2),\n",
      "            ('b', 0),\n",
      "            ('b', 1),\n",
      "            ('b', 2),\n",
      "            ('c', 0),\n",
      "            ('c', 1),\n",
      "            ('c', 2),\n",
      "            ('d', 0),\n",
      "            ('d', 1),\n",
      "            ('d', 2)],\n",
      "           names=['cloth', 'size'])\n",
      "[['a', 'b', 'c', 'd'], [0, 1, 2]]\n"
     ]
    },
    {
     "data": {
      "text/plain": "FrozenList([[0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2]])"
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "#MultiIndex是层级索引，索引类型的一种\n",
    "index1 = pd.MultiIndex.from_arrays([['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'c', 'd', 'd', 'd'],\n",
    "                [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2]], names=['cloth', 'size'])\n",
    "\n",
    "ser_obj = pd.Series(np.random.randn(12),index=index1)\n",
    "print(ser_obj)\n",
    "print(type(ser_obj)) #Series\n",
    "print(type(ser_obj.index)) #索引类型，MultiIndex\n",
    "print(ser_obj.index)\n",
    "print(ser_obj.index.levels) #层级索引的索引值\n",
    "ser_obj.index.codes  #没那么重要，代表索引的位置\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-04-11T06:45:51.402101800Z",
     "start_time": "2024-04-11T06:45:48.937068700Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "outputs": [
    {
     "data": {
      "text/plain": "cloth  size\na      0       0.315327\n       1       0.700162\n       2      -0.647499\nb      0      -0.938938\n       1       1.278033\n       2      -1.203033\nc      0       0.933371\n       1       0.559600\n       2       1.064502\nd      0       0.709540\n       1       0.760230\n       2       0.676333\ndtype: float64"
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ser_obj"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-04-11T06:49:12.328857400Z",
     "start_time": "2024-04-11T06:49:12.288882600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--------------------------------------------------\n",
      "size\n",
      "0    0.933371\n",
      "1    0.559600\n",
      "2    1.064502\n",
      "dtype: float64\n",
      "--------------------------------------------------\n",
      "-0.6474985639664045\n",
      "--------------------------------------------------\n",
      "cloth\n",
      "a   -0.647499\n",
      "b   -1.203033\n",
      "c    1.064502\n",
      "d    0.676333\n",
      "dtype: float64\n"
     ]
    }
   ],
   "source": [
    "#层级索引如何取数据\n",
    "print('-'*50)\n",
    "print(ser_obj['c']) #取出c的所有数据，取出的是series\n",
    "print('-'*50)\n",
    "print(ser_obj['a', 2])\n",
    "print('-'*50)\n",
    "print(ser_obj[:, 2]) #取出所有行的内层索引为2的数据"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-04-11T06:49:46.973048Z",
     "start_time": "2024-04-11T06:49:46.940006600Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 交换层级"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "size  cloth\n",
      "0     a        0.315327\n",
      "1     a        0.700162\n",
      "2     a       -0.647499\n",
      "0     b       -0.938938\n",
      "1     b        1.278033\n",
      "2     b       -1.203033\n",
      "0     c        0.933371\n",
      "1     c        0.559600\n",
      "2     c        1.064502\n",
      "0     d        0.709540\n",
      "1     d        0.760230\n",
      "2     d        0.676333\n",
      "dtype: float64\n",
      "--------------------------------------------------\n",
      "cloth  size\n",
      "a      0       0.315327\n",
      "       1       0.700162\n",
      "       2      -0.647499\n",
      "b      0      -0.938938\n",
      "       1       1.278033\n",
      "       2      -1.203033\n",
      "c      0       0.933371\n",
      "       1       0.559600\n",
      "       2       1.064502\n",
      "d      0       0.709540\n",
      "       1       0.760230\n",
      "       2       0.676333\n",
      "dtype: float64\n",
      "--------------------------------------------------\n",
      "size  cloth\n",
      "0     a        0.315327\n",
      "1     a        0.700162\n",
      "2     a       -0.647499\n",
      "0     b       -0.938938\n",
      "1     b        1.278033\n",
      "2     b       -1.203033\n",
      "0     c        0.933371\n",
      "1     c        0.559600\n",
      "2     c        1.064502\n",
      "0     d        0.709540\n",
      "1     d        0.760230\n",
      "2     d        0.676333\n",
      "dtype: float64\n"
     ]
    }
   ],
   "source": [
    "print(ser_obj.swaplevel())\n",
    "print('-'*50)\n",
    "print(ser_obj)\n",
    "print('-'*50)\n",
    "ser_obj=ser_obj.swaplevel()\n",
    "print(ser_obj)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-04-11T06:51:14.941405300Z",
     "start_time": "2024-04-11T06:51:14.912422800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "size  cloth\n",
      "0     a        0.315327\n",
      "      b       -0.938938\n",
      "      c        0.933371\n",
      "      d        0.709540\n",
      "1     a        0.700162\n",
      "      b        1.278033\n",
      "      c        0.559600\n",
      "      d        0.760230\n",
      "2     a       -0.647499\n",
      "      b       -1.203033\n",
      "      c        1.064502\n",
      "      d        0.676333\n",
      "dtype: float64\n"
     ]
    }
   ],
   "source": [
    "print(ser_obj.sort_index(level=0))  #层级索引按那个索引级别排序,level=0表示按最外层索引排序"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-04-11T06:53:23.503123200Z",
     "start_time": "2024-04-11T06:53:23.466143400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "cloth         a         b         c         d\n",
      "size                                         \n",
      "0      0.315327 -0.938938  0.933371  0.709540\n",
      "1      0.700162  1.278033  0.559600  0.760230\n",
      "2     -0.647499 -1.203033  1.064502  0.676333\n"
     ]
    }
   ],
   "source": [
    "#把0索引（最外层索引）变为列索引\n",
    "df_obj=ser_obj.unstack(1)  #unstack可以放索引名，或者索引位置\n",
    "print(df_obj)\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-04-11T06:55:20.497010900Z",
     "start_time": "2024-04-11T06:55:20.474008500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "cloth         a         b         c         d\n",
      "size                                         \n",
      "0      0.315327 -0.938938  0.933371  0.709540\n",
      "1      0.700162  1.278033  0.559600  0.760230\n",
      "2     -0.647499 -1.203033  1.064502  0.676333\n",
      "size  cloth\n",
      "0     a        0.315327\n",
      "      b       -0.938938\n",
      "      c        0.933371\n",
      "      d        0.709540\n",
      "1     a        0.700162\n",
      "      b        1.278033\n",
      "      c        0.559600\n",
      "      d        0.760230\n",
      "2     a       -0.647499\n",
      "      b       -1.203033\n",
      "      c        1.064502\n",
      "      d        0.676333\n",
      "dtype: float64\n"
     ]
    }
   ],
   "source": [
    "print(df_obj)\n",
    "#对df进行stack，就会把行，列索引进行堆叠，变为series\n",
    "#把列索引放入内层,只能放到内层\n",
    "print(df_obj.stack())  #stack变为series和unstack保持一致的\n",
    "# df_obj=df_obj.transpose()"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-04-11T06:55:55.636811Z",
     "start_time": "2024-04-11T06:55:55.623820300Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [],
   "metadata": {
    "collapsed": false
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
